knitr::opts_chunk$set(
warning = TRUE, # show warnings during codebook generation
message = TRUE, # show messages during codebook generation
error = TRUE, # do not interrupt codebook generation in case of errors,
# usually better for debugging
echo = TRUE # show R code
)
ggplot2::theme_set(ggplot2::theme_bw())
pander::panderOptions("table.split.table", Inf)
# load libraries
library(codebook)
library(here)
library(dplyr)
library(tidyverse)
library(future)
library(labelled)
This is a data dictionary for the data set used in the paper “Patterns of language switching and bilingual childrens word learning: An experiment across two communities”.
#load dataset
data_clean <- read.csv(here("analysis/data_clean.csv"))
codebook_items(data_clean)
This dataset included 35 French-English bilingual children (19 girls) and 27 Spanish-English bilingual children (13) who participated in our experiment.
data_clean %>%
distinct(subject_id, .keep_all = T) %>%
group_by(lang_comm) %>%
summarize(mean_age_in_months = mean(age_in_months, na.rm = T),
sd = sd(age_in_months, na.rm = T),
min = min(age_in_months, na.rm = T),
max = max(age_in_months, na.rm = T))
## # A tibble: 2 x 5
## lang_comm mean_age_in_months sd min max
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 French-English 49.0 7.12 36.5 60.6
## 2 Spanish-English 49.3 9.21 36.3 63.0
data_clean %>%
select(subject_id, lang_comm, parent_edu_level) %>%
distinct(subject_id, .keep_all = T) %>%
group_by(lang_comm) %>%
mutate(total_n = length(subject_id)) %>%
select(lang_comm, parent_edu_level, total_n) %>%
group_by(lang_comm, parent_edu_level) %>%
mutate(n = length(parent_edu_level),
percent = round(n/total_n*100)) %>%
distinct(parent_edu_level, .keep_all = T)
## # A tibble: 12 x 5
## # Groups: lang_comm, parent_edu_level [12]
## lang_comm parent_edu_level total_n n percent
## <chr> <chr> <int> <int> <dbl>
## 1 French-English Attestation of College Studies (AÉC) 35 1 3
## 2 French-English College Certificate/Diploma 35 6 17
## 3 French-English Master’s Degree 35 8 23
## 4 French-English Bachelor’s Degree 35 13 37
## 5 French-English Doctoral Degree 35 3 9
## 6 French-English Some College/University 35 4 11
## 7 Spanish-English Bachelor’s Degree 27 5 19
## 8 Spanish-English Master’s Degree 27 14 52
## 9 Spanish-English Doctoral Degree 27 2 7
## 10 Spanish-English College Certificate/Diploma 27 2 7
## 11 Spanish-English Some College/University 27 1 4
## 12 Spanish-English <NA> 27 3 11
individual_exposure <- data_clean %>%
select(subject_id, lang_comm, matches("exposure")) %>%
distinct(subject_id, .keep_all = T) %>%
mutate(global_exposure_fr_sp = case_when(lang_comm == "French-English" ~ as.numeric(global_exposure_fr),
lang_comm == "Spanish-English" ~ as.numeric(global_exposure_sp),
TRUE ~ NA_real_)) %>%
select(subject_id, lang_comm, global_exposure_eng, global_exposure_fr_sp) %>%
pivot_longer(-c(subject_id, lang_comm), names_to = "language", values_to = "exposure") %>%
mutate(language = recode(language, "global_exposure_eng" = "English", "global_exposure_fr_sp" = "French/Spanish"))
data_clean %>%
select(subject_id, lang_comm, matches("exposure")) %>%
distinct(subject_id, .keep_all = T) %>%
mutate(global_exposure_fr_sp = case_when(lang_comm == "French-English" ~ as.numeric(global_exposure_fr),
lang_comm == "Spanish-English" ~ as.numeric(global_exposure_sp),
TRUE ~ NA_real_)) %>%
select(subject_id, lang_comm, global_exposure_eng, global_exposure_fr_sp) %>%
pivot_longer(-c(subject_id, lang_comm), names_to = "language", values_to = "exposure") %>%
mutate(language = recode(language, "global_exposure_eng" = "English", "global_exposure_fr_sp" = "French/Spanish")) %>%
group_by(lang_comm, language) %>%
summarize(mean_exposure = mean(exposure, na.rm = T),
sd_exposure = sd(exposure, na.rm = T),
min_exposure = min(exposure, na.rm = T),
max_exposure = max(exposure, na.rm = T),
se = sd_exposure / sqrt(n()),
CI_lower = mean_exposure - (1.96 * se),
CI_upper = mean_exposure + (1.96 * se)) %>%
rename(exposure = mean_exposure) %>%
ggplot(aes(x = lang_comm, y = exposure, fill = language, color = language)) +
geom_bar(stat="identity", position=position_dodge(width=0.9), alpha = .25) +
geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper), width=.2, position = position_dodge(.9), color = "#4d4d4d", alpha = 0.75) +
geom_dotplot(data = individual_exposure, aes(y = exposure, x = lang_comm, fill = language, color = language),
binaxis = 'y', stackdir = 'center', position = position_dodge(0.9), stackratio = 1, dotsize = 0.5, alpha = 0.8) +
coord_flip()
data_clean %>%
filter(missing_LEQ == 1) %>%
distinct(subject_id) %>%
nrow()
## [1] 4
data_clean %>%
filter(exclude_preterm == 1) %>%
distinct(subject_id) %>%
nrow()
## [1] 4
data_clean %>%
filter(exclude_language_problem == 1) %>%
distinct(subject_id) %>%
nrow()
## [1] 1
data_clean %>%
filter(exclude_incomplete == 1) %>%
distinct(subject_id) %>%
nrow()
## [1] 1
data_clean %>%
filter(exclude_parent == 1) %>%
distinct(subject_id) %>%
nrow()
## [1] 1
Note that those paritcipants being flagged as 1 were excluded from the final analysis.
data_clean %>%
distinct(test_order)
## test_order
## 1 en_it
## 2 en_ol
## 3 fr_it
## 4 fr_ol
## 5 sp_it
## 6 sp_ol
data_clean %>%
distinct(block)
## block
## 1 familiar
## 2 block1
## 3 block2
data_clean %>%
distinct(test_part)
## test_part
## 1 fam_test
## 2 nov_learn
## 3 nov_test
data_clean %>%
group_by(test_part) %>%
distinct(language) %>%
arrange(test_part)
## # A tibble: 13 x 2
## # Groups: test_part [3]
## test_part language
## <chr> <chr>
## 1 fam_test english
## 2 fam_test french
## 3 fam_test spanish
## 4 nov_learn english-french
## 5 nov_learn english
## 6 nov_learn french
## 7 nov_learn french-english
## 8 nov_learn english-spanish
## 9 nov_learn spanish
## 10 nov_learn spanish-english
## 11 nov_test english
## 12 nov_test french
## 13 nov_test spanish
data_clean %>%
distinct(condition)
## condition
## 1 familiar
## 2 immediate_translation
## 3 one_language_at_a_time
data_clean %>%
select(subject_id, lang_comm, test_order) %>%
distinct(subject_id, .keep_all = T) %>%
group_by(lang_comm) %>%
count(test_order) %>%
ggplot(aes(x = lang_comm, y = n, fill = test_order)) +
geom_bar(stat="identity", position=position_dodge(width=0.9)) +
scale_y_continuous(limits = c(0, 10), breaks = seq(0, 10, by = 1))
level_order <- c("familiar", "block1", "block2")
data_clean %>%
group_by(subject_id, block) %>%
filter(test_part == "fam_test" | test_part == "nov_test") %>%
count() %>%
ggplot(aes(x = subject_id, y = n, fill = factor(block, level = level_order))) +
geom_bar(stat="identity", position=position_dodge(width=0.9)) +
scale_y_continuous(limits = c(0, 12), breaks = seq(0, 12, by = 1)) +
facet_grid(factor(block, level = level_order) ~ .) +
coord_flip() +
theme(text = element_text(size=12),
axis.text = element_text(size = 12, color = "black"),
legend.text = element_text(size = 12),
legend.position="bottom")